perm filename PAPER.TEX[XET,DEK] blob sn#833360 filedate 1987-01-30 generic text, type C, neo UTF8
COMMENT ⊗   VALID 00052 PAGES
C REC  PAGE   DESCRIPTION
C00001 00001
C00008 00002	% Mixing right-to-left texts with left-to-right texts, by Knuth and MacKay
C00011 00003	\leftline{\bf Mixing right-to-left texts with left-to-right texts}
C00015 00004	\subsection Terminology and conventions.
C00017 00005	\subsection The simplest case.
C00019 00006	\subsection Alternating texts.
C00024 00007	\subsection An approach to implementation.
C00028 00008	\subsection Fixing bugs.
C00031 00009	\subsection L-chauvinism.
C00038 00010	\subsection Multi-level mixing.
C00044 00011	\subsection Conclusions.
C00045 00012	\bigbreak\leftline{\bf Appendix}\par\nobreak\medskip\noindent
C00049 00013	% macros copied from WEBMAC will be used for the rest of this paper!
C00066 00014	\M2. Here we should change the final introductory paragraph
C00067 00015	\M11. The \\{pool\_name} is changed so that \TeXXeT\
C00068 00016	\M161. Additional subroutines, to be defined later, are stuck into the
C00069 00017	\M208. A new command code is added at the end of the former list the
C00070 00018	\M209. We have to add 1 to the right-hand sides of all these definitions.
C00071 00019	\M585. The description of \.{DVI} commands is augmented by two new
C00073 00020	\M586. Two new definitions are needed:
C00074 00021	\M638. At the beginning of \\{ship\_out}, we will initialize a stack of
C00075 00022	\M639. At the end of \\{ship\_out}, we want to clear out the LR stack.
C00076 00023	\M649. The \\{hpack} routine is modified to keep an LR stack as it packages
C00078 00024	\M877. Similarly, the \\{post\_line\_break} should keep an LR stack,
C00080 00025	\M880. The new actions to be performed when broken lines are being packaged
C00082 00026	\M1090. We add `$\\{vmode}+\\{LR}$' as a new subcase after
C00083 00027	\M1196. Math-in-text will be formatted left-to-right, because two new
C00085 00028	\M1341. The new primitive operations put new kinds of whatsit nodes into
C00086 00029	\M1344. Here's where the new primitives get established.
C00088 00030	\M1346. The new primitives call for a new case of cases here.
C00089 00031	\kern-3pt\M1356. We also need to be able to display the newfangled whatsits.
C00090 00032	\kern-1pt\M1357, 1358. Copying and deleting the new nodes is easy, since they
C00091 00033	\M1360.  We used to \\{do\_nothing} here, but now we must \\{do\_something}:
C00092 00034	\M1366. \P$\X1366:Output the whatsit node \|p in an hlist\X\S$\6
C00093 00035	\M1376. Most of the changes have been saved up for the end, so that the
C00094 00036	\M1377. A number of routines are based on a stack of one-word nodes whose %
C00095 00037	\M1378. \P$\X1378:Declare functions needed for special kinds of nodes\X\S$\6
C00096 00038	\M1379. \P$\X1378:Declare functions needed for special kinds of nodes\X%
C00098 00039	\eject\M1380. \P$\X1380:Append a \\{begin\_L} to the tail of the current list\X%
C00099 00040	\M1381. \P$\X1381:Append an \\{end\_L} to the tail of the current list\X\S$%
C00100 00041	\M1382. When the stack-manipulation macros of this section are used below,
C00102 00042	\M1383. \P$\X1383:Insert LR nodes at the beginning of the current line\X\S$%
C00103 00043	\M1384. \P$\X1384:Adjust the LR stack based on LR nodes in this line\X\S$\6
C00104 00044	\M1385. We use the fact that \|q now points to the node with \.{\\rightskip}
C00106 00045	\M1386. \P$\X1386:Adjust the LR stack for the \\{hpack} routine\X\S$\6
C00107 00046	\M1387. \P$\X1387:Check for LR anomalies at the end of \\{hpack}\X\S$\6
C00109 00047	\M1388. \P$\X1388:Output a reflection instruction if the direction has
C00111 00048	% end of the WEAVE macros
C00112 00049	\bigbreak\leftline{\bf Final Important Note}\par\nobreak\medskip\noindent
C00114 00050	\bigbreak\leftline{\bf Bibliography}\par\nobreak
C00115 00051	\bigbreak\leftline{\bf Examples of Typesetting Practice}\par\medskip
C00118 00052	\bye
C00119 ENDMK
C⊗;
% Mixing right-to-left texts with left-to-right texts, by Knuth and MacKay
% (this is a preprocessed file that can be typeset with ordinary TeX)

\hsize=6.5in \vsize=8.75in % TUGboat (cleared by BB, Jan 87)

\font\revrm=xbmc10[xet,dek] \hyphenchar\revrm=-1
\font\revtt=ttmc10[xet,dek] \hyphenchar\revtt=-1
\font\logo=logo10 % font used for the METAFONT logo
\font\tentex=cmtex10 % TeX extended character set (used in strings)
\font\eightrm=cmr8

\def\DVIIVD{{\tt DVI\hbox{-}\revtt IVD}}
\def\TeXXeT{{\rm \TeX\hbox{-}\XeT}}
\def\XeT{{\revrm X\kern-.125em\lower.5ex\hbox{E}\kern-.1667em T}}
\def\MF{{\logo META}\-{\logo FONT}}
\def\<#1>{\leavevmode\hbox{$\langle$#1\/$\rangle$}} % syntactic quantity

\def\subsection #1. {\medbreak\noindent
  \advance\secno by 1
  {\sl \the\secno.\enspace #1.}\quad}
\newcount\secno

\catcode`\|=\active
\def|#1|{{\revrm\reflect#1\empty\tcelfer}}
\def\reflect#1#2\tcelfer{\ifx#1\empty\else\reflect#2\tcelfer#1\fi}

\def\begintt{$$\ttverbatim \parskip=0pt \ttfinish}
\chardef\other=12
\def\ttverbatim{\begingroup
  \catcode`\\=\other \catcode`\{=\other \catcode`\}=\other
  \catcode`\$=\other \catcode`\&=\other \catcode`\#=\other
  \catcode`\%=\other \catcode`\~=\other \catcode`\_=\other
  \catcode`\↑=\other \catcode`\|=\other \catcode`\"=\other
  \obeyspaces \obeylines \tt}
{\obeyspaces\gdef {\ }}
{\catcode`\|=0 |catcode`|\=\other % | is temporary escape character
  |obeylines % end of line is active
  |gdef|ttfinish#1↑↑M#2\endtt{#1|vbox{#2}|endgroup$$}}
\catcode`\@=\active \def@{\ttverbatim\let@=\endgroup}
\catcode`\"=\active \def"#1"{{\spacefactor=#1}}
\leftline{\bf Mixing right-to-left texts with left-to-right texts}
\medskip
\leftline{\indent Donald E. Knuth and Pierre MacKay}
\bigskip
\noindent
\TeX\ was designed to produce documents that are read from left-to-right
and top-to-bottom, according to the conventions of English and other
Western languages.  If such documents are turned $90↑\circ$, they can also
be read from top-to-bottom and right-to-left, as in Japan. Another
$90↑\circ$ or $180↑\circ$ turn yields documents that are readable from
right-to-left and bottom-to-top, or from bottom-to-top and left-to-right,
in case a need for such conventions ever arises. However, \TeX\ as it
stands is not suitable for languages like Arabic or Hebrew, which are
right-to-left and top-to-bottom.

It would not be difficult to use \TeX\ for documents that are purely
Arabic or purely Hebrew, by essentially producing the mirror image
of whatever document is desired. A raster-oriented printing device
could easily be programmed to reflect the bits from right to left as it
puts them on the pages. (This is sometimes called ``T-shirt mode'',
because it can be used to make iron-on transfers that produce
readable T-shirt messages, when English language output is
transferred to cloth after being printed in mirror image.)

Complications arise, however, when left-to-right conventions are mixed with
right-to-left conventions in the same document. Consider
an Arabic/English dictionary, or a Bible commentary that quotes Hebrew,
or a Middle-Eastern encyclopedia that refers to Western names in
roman letters; such documents, and many others, must go both ways.

The purpose of this paper is to clarify the issues involved in
mixed-direction document production, from the standpoint of a
Western author or reader or software implementor.
We shall also consider changes to \TeX\ that will extend it to a
bidirectional formatting system.

\subsection Terminology and conventions.
Let us say for convenience that an {\it L-text\/} is textual material
that is meant to be read from left to right, and an {\it R-text\/}
is textual material that is meant to be read from right to left.
Similarly we might say that English and Spanish are L-languages, while
Arabic and Hebrew are R-languages.

In order to make this paper intelligible to English readers who are
unfamiliar with R-languages, we shall use ``reflected English'',
i.e., |English|, as an R-language. All texts in reflected
English will be typeset in
|Extended| |Bold| |Modern| |Computer| %%%
type, which is a reflected version of Computer Modern Bold Extended type.
To translate from English to |English| and back again, one simply
needs to reverse the order of reading. Both English and |English| are
pronounced in the same way, except that |English| should be spoken in
a louder and/or deeper voice, so that a listener can distinguish it.

\subsection The simplest case.
It's not difficult to typeset single R-language words in an L-text document.
\TeX\ will work fine if you never need to deal with R-texts of more
than one word at a time; all you have to do is figure out a macro that will
reverse isolated words.

Let's suppose that we want to type `@the@ @|English|@ @script@' in
order to typeset `the |English| script' with \TeX. All we need is a font for
|English|, called @xbmc10@, say, and the following macros:
\begintt
\font\revrm=xbmc10      \hyphenchar\revrm=-1
\catcode`\|=\active
\def|#1|{{\revrm\reflect#1\empty\tcelfer}}
\def\reflect#1#2\tcelfer{\ifx#1\empty\else\reflect#2\tcelfer#1\fi}
\endtt
(The @xbmc10@ font can be generated like @cmbx10@ with the
extra \MF\ statement
\begintt
extra_endchar := extra_endchar &
 "currentpicture:=currentpicture reflectedabout((.5[l,r],0),(.5[l,r],1))"
\endtt
added to the parameter file. It has the same character widths as @cmbx10@.)
\subsection Alternating texts.
But the simple approach sketched above does not work when there are
multiword R-text phrases, i.e.,
{\bf |phrases| |R-text| |multiword|}, %%%
embedded in an L-text document---because of the possibility
of line breaks, i.e.,
{\bf |breaks| |line| |of| |possibility| |the| |of| |because|}. %%%
For example, let's consider the problem of typesetting the following
paragraph:\footnote*{After Leonardo lost the use of his right hand,
he began to make lefthanded notes in mirror writing. Of course, he
actually wrote in |Italian| instead of |English|.}
\begintt
Leonardo da Vinci made a sweeping statement in his notebooks:
|``Let no one who is not a mathematician read my works.''|
In fact, he said it twice, so he probably meant it.
\endtt
Here are samples of the proper results, considering two different column widths:
$$\hbox to\hsize{\indent
\vtop{\hsize=250pt
Leonardo da Vinci made a sweeping statement in his
notebooks: {\bf|mathematician| |a| |not| |is| |who| |one| |no| |``Let| %%%
|works.''| |my| |read|}"3000" In fact, he said it twice, so he probably %%%
meant it.}
\hfil
\vtop{\hsize=170pt
Leonardo da Vinci made a sweep-
ing statement in his notebooks: {\bf|``Let| %%%
|mathemati-| |a| |not| |is| |who| |one| |no| %%%
|works.''| |my| |read| |cian|}"3000" In fact, he %%%
said it twice, so he probably meant it.}\indent
}$$
Notice that the R-text in each line is reflected; in particular,
a hyphen that has been inserted at the right of an R-segment
will appear at the left of that segment.

How can we get \TeX\ to do this? The best approach is probably to
extend the driver programs that produce printed output from
the @DVI@ files that \TeX\ writes, instead of trying to do
tricky things with \TeX\ macros. Then \TeX\ itself merely needs to
put special codes into the @DVI@ output files, in order to
tell the ``\thinspace\DVIIVD\thinspace'' drivers what to do.

For example, one idea that almost works is to put `@\special{R}@'
just before an R-text begins, and `@\special{L}@' just after it ends.
In other words, we can change the `@|@' macro in our earlier example
to the simple form
\begintt
\def|#1|{{\revrm\special{R}#1\special{L}}}
\endtt
which does not actually reverse the characters; we can also leave the
`@\hyphenchar@' of @\revrm@ at its normal value, so that R-texts
will be hyphenated. Line breaking will proceed in the normal way,
and it will be the job of the \DVIIVD\ driver program to reflect every
segment that it sees between an @R@ and an @L@.

Reflecting might involve arbitrary combinations of characters, rules, accents,
kerns, etc.; for example, the R-text might be in |fran{{\c c}}ais|,
or it might even refer to \XeT!
\subsection An approach to implementation.
In order to understand how \DVIIVD\ programs might do the required tasks,
we need to look into the information that \TeX\ puts into a @DVI@ file.
The basic idea is that whenever \TeX\ outputs an hbox or a vbox,
the @DVI@ file gets a `{\it push\/}' command, followed by various
commands to typeset the box contents, followed by a `{\it pop\/}' command.
Therefore we can try the following strategy:
\smallskip
\item{a)} Whenever `@\special{R}@' is found in the @DVI@ file,
remember the current horizontal position $h_0$ and vertical position $v_0$;
also remember the current location $p_0$ in the @DVI@ file.
Set $c\leftarrow0$. Then begin to skim the next @DVI@ instructions instead
of actually using them for typesetting; but
keep updating the horizontal and vertical page positions as usual.
\smallskip
\item{b)} When `@\special{L}@' is found in the @DVI@ file, stop
skimming instructions. Then typeset all instructions between $p_0$
and the current location, in mirror-reflected mode, as explained below.
\smallskip
\item{c)} When `{\it push\/}' occurs when skimming instructions,
increase $c$ by~1.
\smallskip
\item{d)} When `{\it pop\/}' occurs when skimming instructions,
there are two cases. If $c>0$, decrease $c$ by~1. (This `{\it pop\/}'
matches a previously skimmed `{\it push\/}'.) But if $c=0$, effectively
insert `@\special{L}@' at this point and `@\special{R}@' just
after the very next `{\it push\/}'.
\smallskip\noindent
The mirror-reflected mode for @DVI@ commands in positions $p_0$ to
$p_1$ in the @DVI@ file, beginning at $(h_0,v_0)$ and ending at
$(h_1,v_1)$, works like this: A character of width~$w$ whose box sits
on the baseline between $(h,v)$ and $(h+w,v)$ in normal mode should
be placed so that its box sits on the baseline between $(h'-w,v)$ and
$(h',v)$ in mirror mode, where $h'$ is defined by the equation
$$\leftline{\indent$h-h_0\;=\;h_1-h'$.}$$
Similarly, a rule of width~$w$ whose lower edge runs from
$(h,v)$ to $(h+w,v)$ in normal mode should run from
$(h'-w,v)$ to $(h',v)$ in mirror mode.
\subsection Fixing bugs.
We stated above that the approach just sketched will ``almost'' work.
But it can fail in three ways, when combined with the full generality of
\TeX. First, there might be material ``between the lines'' that is inserted by
@\vadjust@ commands; this material might improperly be treated as R-text.
Second, the suggested mechanism doesn't always find the correct left
edge of segments that are being reflected, since the reflection should not
always begin at the extreme left edge of a typeset line; it should begin
after the @\leftskip@ glue and before other initial spacing
due to things like accent positioning. Third, certain tricks that
involve `@\unhbox@' can make entire lines disappear from the
@DVI@ file; however, this problem is not as serious as the other two,
because people shouldn't be playing such tricks.

A much more reliable and robust scheme can be obtained by building
a specially extended version of \TeX, which puts matching special commands
into every line that has reflected material. It is not difficult to
add this additional activity to \TeX's existing line-breaking mechanism; the
details appear in an appendix below. When this change has been made,
parts (c) and~(d) of the \DVIIVD\ skimming algorithm can be eliminated,
since the case $c=0$ will never arise in part~(d).
\subsection L-chauvinism.
We have been discussing mixed documents as if they always consist of R-texts
inserted into L-texts; but people whose native script is right-to-left naturally
think of mixed documents as the insertion of L-texts into R-texts. In fact,
there are two ways to read every page of a document, one in which the eye
begins to scan each line at the left and one in which the eye begins to
scan each line at the right.

The Leonardo illustration above is an example of the first kind, and we shall
call it an {\it L-document}. To read a given line of an L-document,
you start at the left and read any L-text that you see. Whenever your
eyes encounter an R-character, they skim ahead to the end of the next
R-segment (i.e., until the next L-character, or until the end of the line,
whichever comes first); then you read the R-segment right-to-left, and
continue as before. The rules for reading an R-document are similar,
but with right and left reversed.

It's usually possible to distinguish an L-document from an R-document
because of the indentation on the first line of a paragraph and/or
the blank space on the last line. For example, the R-documents
that correspond to the two L-document settings of the paragraph about
Leonardo look like this:
$$\hbox to\hsize{\indent
\vtop{\hsize=250pt\noindent
Leonardo da Vinci made a sweeping statement in his\indent\break %%%
{\bf|mathematician| |a| |not| |is| |who| |one| |no| |``Let|} notebooks:\break %%%
In fact, he said it twice, so he probably"3000" {\bf|works.''| |my| |read|}\break %%%
\null\hfil meant it.\parfillskip=0pt}
\hfil
\vtop{\hsize=170pt\noindent
Leonardo da Vinci made a sweep-\indent\break %%%
|``Let| ing statement in his notebooks:\break %%%
{\bf|mathemati-| |a| |not| |is| |who| |one| |no|}\break %%%
In fact, he"3000" {\bf|works.''| |my| |read| |cian|}\break %%%
\null\hfil said it twice, so he probably meant it.\parfillskip=0pt}\indent
}$$
We can imagine that these R-documents were composed on an R-terminal
and processed by \XeT\ from an {\bf|ifle| |input|} % kludge for ligature fi!
that looks like this:

\begingroup \leftskip=0pt plus 1fill \let\tt=\revtt \rightskip=20pt
\begintt
teL`` |:skoobeton sih ni tnemetats gnipeews a edam icniV ad odranoeL|
ym daer naicitamehtam a ton si ohw eno on
|.ti tnaem ylbaborp eh os ,eciwt ti dias eh ,tcaf nI| ''.skrow
\endtt
\endgroup In this case it is the L-text, not the R-text,
that is enclosed in @|@'s.
(The reader is urged to study this example carefully;
there {\it is\/} |method| in't!)

A poet could presumably construct interesting poems
that have both L-meanings and R-meanings, when read as L-documents
and R-documents.

Notice that our examples from Leonardo have used boldface quotation marks
(i.e., the quotation marks of |English|), so that these marks belong to
the text being quoted. This may seem erroneous; but it is in fact a necessary
convention in documents that are meant to display no favoritism between
L-readers and R-readers, because it ensures that the quotation marks will
stay with the text being reflected. (See the examples of contemporary
typesetting at the end of this paper.)
If we had put the quotations marks into English rather than |English|, the
R-documents illustrated above would have looked very strange indeed:
$$\hbox to\hsize{\indent
\vtop{\hsize=250pt\noindent
Leonardo da Vinci made a sweeping statement in his\indent\break %%%
|mathematician| |a| |not| |is| |who| |one| |no| |Let|notebooks: ``\break %%%
''"3000" In fact, he said it twice, so he probably|works.| |my| |read|\break %%%
\null\hfil meant it.\parfillskip=0pt}
\hfil
\vtop{\hsize=170pt\noindent
Leonardo da Vinci made a sweep-\indent\break %%%
|Let|ing statement in his notebooks: ``\break %%%
|mathemati-| |a| |not| |is| |who| |one| |no|\break %%%
''"3000" In fact, he|works.| |my| |read| |cian|\break %%%
\null\hfil said it twice, so he probably meant it.\parfillskip=0pt}\indent
}$$
\subsection Multi-level mixing.
The problems of mixed R- and L-typesetting go deeper than this,
because there might be an L-text inside an R-text inside an L-text.
For example, we might want to typeset a paragraph whose \TeX\ source
file looks like this:
\begintt
\R{Alice} said, \R{``You think English is \L{`English written backwards'};
but to me, \L{English} is English written backwards.  I'm sure \L{Knuth}
and \L{MacKay} will both agree with me.''}  And she was right.
\endtt
An intelligent bidirectional reader will want this to be typeset as if it
were an R-document inside an L-document. In other words, the eyes
of such a reader will naturally scan some of the lines beginning at the left,
and some of them beginning at the right. Here are examples of the
desired output, set with two different line widths:
$$\hbox to\hsize{\indent
\vtop{\hsize=182pt
|Alice| said, {\bf|is| |English| |think| |``You| %%%
|me,| |to| |but|"1500" |;|{\rm`English written backwards'} %%%
|backwards.| |written| |English| |is| {\rm English} %%%
|both| |will| {\rm MacKay} |and| {\rm Knuth} |sure| |I'm| %%%
|me.''| |with| |agree|}"3000" And she was right. %%%
}
\hfil
\vtop{\hsize=200pt
|Alice| said, `En- {\bf|is| |English| |think| |``You| %%%
{\rm English}"1250" |me,| |to| |but|"1500" |;|{\rm glish written backwards'} %%%
|sure| |I'm|"3000" |backwards.| |written| |English| |is| %%%
|with| |agree| |both| |will| {\rm MacKay} |and| {\rm Knuth} %%%
|me.''|}"3000" And she was right. %%%
}\indent}\postdisplaypenalty=10000$$
(Look closely.)

Multi-level documents are inherently ambiguous. For example,
the right-hand setting above might be interpreted as the result of
\begintt
...\R{... I'm sure and \L{MacKay} will both agree with} Knuth \R{me.''}...
\endtt
and the left-hand setting would also result from a source file like this(!)
\begintt
\indent\R{``You think English is \L{said,} Alice
\L{`English}; but to me,} written backwards'
\R{written backwards.} \R{\L{English} is English}
will both} MacKay \R{and} Knuth \R{I'm sure
\L{And she} agree with me.''} was right.
\endtt
except for slight differences in spacing due to \TeX's
``@spacefactor@'' for punctuation.

In general, we have @\R{\L{a}\L{b}}@$\null=\null$@ba@, hence any
permutation of the characters on each line is theoretically possible.
A reader has to figure out which of the different ways to parse each line
makes most sense. Yet there is unanimous agreement in Middle Eastern countries
that a mixture of L-document and R-document styles is preferable to
an unambiguous insistence on L-reading or R-reading throughout a
document, because it is so natural and because the actual ambiguities
arise rarely in practice. The quotation marks in the example above
make it possible to reconstruct the invisible {\tt\char`\\R}'s and
{\tt\char`\\L}'s; in this way an author can cooperate with a literate
reader so that the meaning is clear.

Multi-level texts arise not only when quotes are inside quotes or when
R-document footnotes or illustrations are attached to L-documents;
they also arise when mathematics is embedded in R-text. For example,
consider the \TeX\ source code
\begintt
The \R{English} version of `the famous identity $e↑{i\pi}+1=0$ due to Euler'
is \R{`the famous identity $e↑{i\pi}+1=0$ due to Euler'}.
\endtt
It should be typeset like this:
$$\hbox to\hsize{\indent
\vtop{\hsize=400pt
The |English| version of `the famous identity $e↑{i\pi}+1=0$ due to
Euler' is {\bf|famous| |`the| %%%
|Euler'| |to| |due| $e↑{i\pi}+1=0$ |identity|}. %%%
}
\hfil
}$$
An extension of \TeX\ called \TeXXeT, described in the appendix,
properly handles multi-level mixtures including math,
as well as the simpler case of alternating R-texts and L-texts.
\subsection Conclusions.
When right-to-left and left-to-right texts are mixed in the same
document, problems can arise that are more subtle than simple
examples might suggest. The difficulties can be overcome by
extending \TeX\ to \TeXXeT\ and by extending @DVI@ drivers to
\DVIIVD\ drivers. Neither of these extensions is extremely complex.
\bigbreak\leftline{\bf Appendix}\par\nobreak\medskip\noindent
The extensions to \TeX\ described here are designed to put the
hitherto-undefined operation codes 250 \hbox{(`{\it begin\_reflection\/}')}
and 251 (`{\it end\_reflection\/}') into the @DVI@ file, instead of
`@\special{R}@' and `@\special{L}@' as mentioned above, because
mixed-direction typesetting is important enough to deserve efficient
@DVI@ coding. The resulting output files are called \DVIIVD\ files.

The \TeX\ language is extended to have four new primitive operations,
\begintt
\beginL     \endL     \beginR     \endR
\endtt
which are supposed to nest like parentheses in each paragraph and
in each hbox. However, @\endL@ and @\endR@ should be omitted at the end
of a paragraph if they are supposed to take effect after the
@\parfillskip@ glue. (Thus, for example,
\begintt
\everypar{\kern-\parindent\beginR\indent}
\endtt
can be used to start a series of paragraphs that all follow the
conventions of an R-document. The last line of every such paragraph
will be flush right, filled at the left; the first line will be
indented at the right.)

These new operations each contribute a new sort of
``whatsit node'' to the current horizontal list; they are additional
cases of a \<horizontal command> as explained in the \TeX\ manual~[1].
The @\L@ and @\R@ macros in our multi-level example about |Alice|
can be defined as follows:
\begintt
\def\L{\afterassignment\moreL \let\next= }
\def\moreL{\bracetest \aftergroup\endL \beginL \rm}
\def\R{\afterassignment\moreR \let\next= }
\def\moreR{\bracetest \aftergroup\endR \beginR \revrm}
\def\bracetest{\ifcat\next{\else\ifcat\next}\fi
  \errmessage{Missing left brace has been substituted}\fi \bgroup}
\endtt

The remainder of this appendix gives complete details about
changes to the standard \TeX\ program~[2] that will convert it to
the extended system \TeXXeT. It is convenient to list these changes
in order by the @WEB@ section numbers in~[2], for every section that
is affected.
% macros copied from WEBMAC will be used for the rest of this paper!
\begingroup
\catcode`\@=\other
\catcode`\"=\other
\parskip 0pt % no stretch between paragraphs
\parindent 1em % for paragraphs and for the first line of Pascal text

\def\\#1{\hbox{\it#1\/\kern.05em}} % italic type for identifiers
\def\|#1{\hbox{$#1$}} % one-letter identifiers look a bit better this way
\def\{\hbox{\bf#1\/}} % boldface type for reserved words
\def\.#1{\hbox{\tentex % typewriter type for strings
  \let\\=\BS % backslash in a string
  \let\'=\RQ % right quote in a string
  \let\`=\LQ % left quote in a string
  \let\{=\LB % left brace in a string
  \let\}=\RB % right brace in a string
  \let\~=\TL % tilde in a string
  \let\ =\SP % space in a string
  \let\_=\UL % underline in a string
  \let\&=\AM % ampersand in a string
  #1}}
\def\#{\hbox{\tt\char`\#}} % parameter sign
\def\${\hbox{\tt\char`\$}} % dollar sign
\def\%{\hbox{\tt\char`\%}} % percent sign
\def\↑{\ifmmode\mathchar"222 \else\char`↑ \fi} % pointer or hat
% circumflex accents can be obtained from \↑↑D instead of \↑

\chardef\AM=`\& % ampersand character in a string
\chardef\BS=`\\ % backslash in a string
\chardef\LB=`\{ % left brace in a string
\def\LQ{{\tt\char'22}} % left quote in a string
\chardef\RB=`\} % right brace in a string
\def\RQ{{\tt\char'23}} % right quote in a string
\def\SP{{\tt\char`\ }} % (visible) space in a string
\chardef\TL=`\~ % tilde in a string
\chardef\UL=`\_ % underline character in a string

\newbox\bak \setbox\bak=\hbox to -1em{} % backspace one em
\newbox\bakk\setbox\bakk=\hbox to -2em{} % backspace two ems

\newcount\ind % current indentation in ems
\def\1{\global\advance\ind by1\hangindent\ind em} % indent one more notch
\def\2{\global\advance\ind by-1} % indent one less notch
\def\3#1{\hfil\penalty#10\hfilneg} % optional break within a statement
\def\4{\copy\bak} % backspace one notch
\def\5{\hfil\penalty-1\hfilneg\kern2.5em\copy\bakk\ignorespaces}% optional break
\def\6{\ifmmode\else\par % forced break
  \hangindent\ind em\noindent\kern\ind em\copy\bakk\ignorespaces\fi}
\def\7{\Y\6} % forced break and a little extra space

\let\yskip=\smallskip
\def\to{\mathrel{.\,.}} % double dot, used only in math mode
\def\note#1#2.{\Y\noindent{\hangindent2em\baselineskip10pt\eightrm#1 #2.\par}}
\def\startsection{\Q\noindent\strut{\bf\modno.\quad}}
\def\defin#1{\global\advance\ind by 2 \1\&{#1 }} % begin `define' or `format'
\def\A{\note{See also}} % cross-reference for multiply defined section names
\def\B{\mathopen{\.{@\{}}} % begin controlled comment
\def\C#1{\ifmmode\gdef\XX{\null$\null}\else\gdef\XX{}\fi % Pascal comments
  \XX\hfil\penalty-1\hfilneg\quad$\{\,$#1$\,\}$\XX}
\def\D{\defin{define}} % macro definition
\def\E{\cdot10↑} % exponent in floating point constant
\def\F{\defin{format}} % format definition
\let\G=\ge % greater than or equal sign
\def\H#1{\hbox{\rm\char"7D\tt#1}} % hexadecimal constant
\let\I=\ne % unequal sign
\def\J{\.{@\&}} % TANGLE's join operation
\let\K=\gets % left arrow
\let\L=\le % less than or equal sign
\outer\def\M#1.{\bigbreak\def\modno{#1}\startsection\ignorespaces\iftrue}
\def\O#1{\hbox{\rm\char'23\kern-.2em\it#1\/\kern.05em}} % octal constant
\def\P{\rightskip=0pt plus 100pt minus 10pt % go into Pascal mode
  \sfcode`;=3000
  \pretolerance 10000
  \hyphenpenalty 10000 \exhyphenpenalty 10000
  \global\ind=2 \1\ \unskip}
\def\Q{\rightskip=0pt % get out of Pascal mode
  \sfcode`;=1500 \pretolerance 200 \hyphenpenalty 50 \exhyphenpenalty 50 }
\let\R=\lnot % logical not
\let\S=\equiv % equivalence sign
\def\T{\mathclose{\.{@\}}}} % terminate controlled comment
\def\U{\note{This code is used in}} % cross-reference for uses of sections
\let\V=\lor % logical or
\let\W=\land % logical and
\def\X#1:#2\X{\ifmmode\gdef\XX{\null$\null}\else\gdef\XX{}\fi % section name
  \XX$\langle\,$#2{\eightrm\kern.5em#1}$\,\rangle$\XX}
\def\Y{\par\yskip}
\let\Z=\let % now you can \send the control sequence \Z
\def\){\hbox{\.{@\$}}} % sign for string pool check sum
\def\]{\hbox{\.{@\\}}} % sign for forced line break
\def\=#1{\kern2pt\hbox{\vrule\vtop{\vbox{\hrule
        \hbox{\strut\kern2pt\.{#1}\kern2pt}}
      \hrule}\vrule}\kern2pt} % verbatim string
\let\~=\ignorespaces

\def\ellipsis{\kern5em\smash{\vdots}\qquad\vbox to12pt{}\par}
\def\hang{\hangindent 3em\noindent\ignorespaces}
\M2. Here we should change the final introductory paragraph;
the new copy will explain that the present
program is actually `\TeXXeT', not `\TeX'. The \\{banner} string is
correspondingly redefined:

\Y\P\D \37$\\{banner}\S\.{\'This\ is\ TeX-XeT,\ Version\ 2.0\'}$%
\C{printed when \TeXXeT\ starts}\par
\fi
\M11. The \\{pool\_name} is changed so that \TeXXeT\
 can coexist happily with \TeX.

\Y\P
$\\{pool\_name}=\.{\'TeXformats:TEXXET.POOL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
\ \'}$;\C{string of length \\{file\_name\_size}; tells where the string pool
appears}\par
\fi
\M161. Additional subroutines, to be defined later, are stuck into the
program at this place.
\Y\P\X1378:Declare functions needed for special kinds of nodes\X\par
\fi
\M208. A new command code is added at the end of the former list; the
final definition is therefore replaced by two:

\Y
\P\D \37$\\{LR}=70$\C{text direction ( \.{\\beginL}, \.{\\beginR}, \.{\\endL}, %
\.{\\endR} )}\par
\P\D \37$\\{max\_non\_prefixed\_command}=70$\C{largest command code that can't
be \.{\\global}}\par
\fi
\M209. We have to add 1 to the right-hand sides of all these definitions.

\Y\P\D \37$\\{toks\_register}=71$\C{token list register ( \.{\\toks} )}\par
\noindent\ellipsis
\P\D \37$\\{set\_interaction}=100$\C{define level of interaction ( \.{%
\\batchmode}, etc.~)}\par
\P\D \37$\\{max\_command}=100$\C{the largest command code seen at \\{big%
\_switch}}\par
\fi
\M585. The description of \.{DVI} commands is augmented by two new
ones at the end:

\yskip\hang\\{begin\_reflect} 250. Begin a (possibly recursive) reflected
segment.

\yskip\hang\\{end\_reflect} 251. End a (possibly recursive) reflected segment.

\yskip\noindent Commands 250--255 are undefined in normal \.{DVI} files,
but 250 and 251 are permitted in the special `\DVIIVD' files
produced by this variant of \TeX.

When a \DVIIVD\ driver encounters a \\{begin\_reflect} command, it should skim
ahead (as previously described) until finding the matching \\{end\_reflect};
these will be properly nested with respect to each other and with
respect to \\{push} and \\{pop}. After skimming has located a segment of
material to be reflected, that segment should be re-scanned
and obeyed in mirror-image mode as
described earlier. The reflected segment might recursively involve
\\{begin\_reflect}/\\{end\_reflect} pairs that need to be reflected again.

\fi
\M586. Two new definitions are needed:

\Y\P\D \37$\\{begin\_reflect}=250$\C{begin a reflected segment (allowed in
\DVIIVD\ files only)}\par
\P\D \37$\\{end\_reflect}=251$\C{end a reflected segment (allowed in
\DVIIVD\ files only)}\par
\fi
\M638. At the beginning of \\{ship\_out}, we will initialize a stack of
\.{\\beginL} and \.{\\beginR} instructions that are currently in force;
this is called the LR stack, and it is maintained with the help of
two global variables called \\{LR\_ptr} and \\{LR\_tmp} that will be
defined later. The instructions inserted here (just before testing if
$\\{tracing\_output}>0$) say that on the outermost level we are typesetting
in left-to-right mode. The opening `\&{begin}' is replaced by:

\Y\P
\&{begin} \37$\\{LR\_ptr}\K\\{get\_avail}$;\5
$\\{info}(\\{LR\_ptr})\K0$;\C{\\{begin\_L\_code} at outer level}\par
\fi
\M639. At the end of \\{ship\_out}, we want to clear out the LR stack.
Thus, `\\{flush\_node\_list}$(p)$' is replaced by:

\Y\P
$\\{flush\_node\_list}(\|p)$;\5
\X1382:Flush the LR stack\X;\par
\fi
\M649. The \\{hpack} routine is modified to keep an LR stack as it packages
a horizontal list, so that errors of mismatched \.{\\beginL...\\endL}
and \.{\\beginR...\\endR} pairs can be detected and corrected.
Changes are needed here at the beginning of the procedure
and at the end.

\Y\P\4\&{function}\1\  \37$\\{hpack}(\|p:\\{pointer};\,\35\|w:\\{scaled};\,\35%
\|m:\\{small\_number})$: \37\\{pointer};\6
\ellipsis\6
\|b: \37\\{integer};\C{badness of the new box}\6
$\\{LR\_ptr},\39\\{LR\_tmp}$: \37\\{pointer};\C{for LR stack maintenance}\6
\\{LR\_problems}: \37\\{integer};\C{counts missing begins and ends}\2\6
\&{begin} \37$\\{LR\_ptr}\K\\{null}$;\5
$\\{LR\_problems}\K0$;\6
$\|r\K\\{get\_node}(\\{box\_node\_size})$;\6
\ellipsis\6
\4\\{common\_ending}: \37\X663:Finish issuing a diagnostic message for an
overfull or underfull hbox\X;\6
\4\\{exit}: \37\X1387:Check for LR anomalies at the end of \\{hpack}\X;\6
$\\{hpack}\K\|r$;\6
\&{end};\par
\fi
\M877. Similarly, the \\{post\_line\_break} should keep an LR stack,
so that it can output \.{\\endL} or \.{\\endR} instructions at the ends of
lines and \.{\\beginL} or \.{\\beginR} instructions at the beginnings of lines.
Changes occur at the beginning and the end of this procedure:

\Y\P
\4\&{procedure}\1\  \37$\\{post\_line\_break}(\\{final\_widow\_penalty}:%
\\{integer})$;\6
\ellipsis\6
\\{cur\_line}: \37\\{halfword};\C{the current line number being justified}\6
$\\{LR\_ptr},\39\\{LR\_tmp}$: \37\\{pointer};\C{for LR stack maintenance}\2\6
\&{begin} \37$\\{LR\_ptr}\K\\{null}$;\5
\X878:Reverse the links of the relevant passive nodes, setting \\{cur\_p} to
the first breakpoint\X;\6
\ellipsis\6
$\\{prev\_graf}\K\\{best\_line}-1$;\5
\X1382:Flush the LR stack\X;\6
\&{end};\par
\fi
\M880. The new actions to be performed when broken lines are being packaged
are accomplished by three new steps added to this section of the program.

\Y\P$\4\X880:Justify the line ending at breakpoint \\{cur\_p}, and append it
to the current vertical list, together with associated penalties and other
insertions\X\S$\6
\X1383:Insert LR nodes at the beginning of the current line\X;\6
\X1384:Adjust the LR stack based on LR nodes in this line\X;\6
\X881:Modify the end of the line to reflect the nature of the break and to
include \.{\\rightskip}; also set the proper value of \\{disc\_break}\X;\6
\X1385:Insert LR nodes at the end of the current line\X;\6
\X887:Put the \.{\\leftskip} glue at the left and detach this line\X;\6
\ellipsis\par\fi
\M1090. We add `$\\{vmode}+\\{LR}$' as a new subcase after
`$\\{vmode}+\\{ex\_space}$' here. This means that the new primitive operations
will become instances of what {\sl The \TeX book\/} calls a
\<horizontal command>.

\fi
\M1196. Math-in-text will be formatted left-to-right, because two new
`append' instructions are inserted into this section of the code.

\Y\P$\4\X1196:Finish math in text\X\S$\6
\&{begin} \37$\\{tail\_append}(\\{new\_math}(\\{math\_surround},\39%
\\{before}))$;\5
\X1380:Append a \\{begin\_L} to the tail of the current list\X;\6
$\\{cur\_mlist}\K\|p$;\5
$\\{cur\_style}\K\\{text\_style}$;\5
$\\{mlist\_penalties}\K(\\{mode}>0)$;\5
\\{mlist\_to\_hlist};\5
$\\{link}(\\{tail})\K\\{link}(\\{temp\_head})$;\6
\&{while} $\\{link}(\\{tail})\I\\{null}$ \1\&{do}\5
$\\{tail}\K\\{link}(\\{tail})$;\2\6
\X1381:Append an \\{end\_L} to the tail of the current list\X;\6
$\\{tail\_append}(\\{new\_math}(\\{math\_surround},\39\\{after}))$;\5
$\\{space\_factor}\K1000$;\5
\\{unsave};\6
\&{end}\par
\fi
\M1341. The new primitive operations put new kinds of whatsit nodes into
horizontal lists. Therefore two additional definitions are needed here:

\Y
\P\D \37$\\{LR\_node}=4$\C{\\{subtype} in whatsits that represent \.{\\beginL},
etc.}\par
\P\D \37$\\{LR\_type}(\#)\S\\{mem}[\#+1].\\{int}$\C{the sub-subtype}\par
\fi
\M1344. Here's where the new primitives get established.

\Y\P\D \37$\\{immediate\_code}=4$\C{command modifier for \.{\\immediate}}\par
\P\D \37$\\{begin\_L\_code}=0$\C{command modifier for \.{\\beginL}}\par
\P\D \37$\\{begin\_R\_code}=1$\C{command modifier for \.{\\beginR}}\par
\P\D \37$\\{end\_L\_code}=2$\C{command modifier for \.{\\endL}}\par
\P\D \37$\\{end\_R\_code}=3$\C{command modifier for \.{\\endR}}\par
\P\D \37$\\{begin\_LR}(\#)\S(\\{LR\_type}(\#)<\\{end\_L\_code})$\par
\P\D \37$\\{begin\_LR\_type}(\#)\S(\\{LR\_type}(\#)-\\{end\_L\_code})$\par
\Y\P$\4\X226:Put each of \TeX's primitives into the hash table\X\mathrel{+}\S$\6
$\\{primitive}(\.{"beginL"},\39\\{LR},\39\\{begin\_L\_code})$;\6
$\\{primitive}(\.{"beginR"},\39\\{LR},\39\\{begin\_R\_code})$;\6
$\\{primitive}(\.{"endL"},\39\\{LR},\39\\{end\_L\_code})$;\6
$\\{primitive}(\.{"endR"},\39\\{LR},\39\\{end\_R\_code})$;\6
$\\{primitive}(\.{"openout"},\39\\{extension},\39\\{open\_node})$;\6
\ellipsis\par\fi
\M1346. The new primitives call for a new case of cases here.

\Y\P
\4\\{LR}: \37\&{case} $\\{chr\_code}$ \1\&{of}\6
\4\\{begin\_L\_code}: \37$\\{print\_esc}(\.{"beginL"})$;\6
\4\\{begin\_R\_code}: \37$\\{print\_esc}(\.{"beginR"})$;\6
\4\\{end\_L\_code}: \37$\\{print\_esc}(\.{"endL"})$;\6
\4\&{othercases} \37$\\{print\_esc}(\.{"endR"})$\2\6
\&{endcases};\par\fi
\kern-3pt\M1356. We also need to be able to display the newfangled whatsits.

\Y\P\1
\4\\{LR\_node}: \37\&{case} $\\{LR\_type}(\|p)$ \1\&{of}\6
\4\\{begin\_L\_code}: \37$\\{print\_esc}(\.{"beginL"})$;\6
\4\\{begin\_R\_code}: \37$\\{print\_esc}(\.{"beginR"})$;\6
\4\\{end\_L\_code}: \37$\\{print\_esc}(\.{"endL"})$;\6
\4\&{othercases} \37$\\{print\_esc}(\.{"endR"})$\2\6
\&{endcases};\par\fi
\kern-1pt\M1357, 1358. Copying and deleting the new nodes is easy, since they
can be handled just like the \.{\\closeout} nodes already present.
We simply replace `\\{close\_node}' by
`\\{close\_node},\thinspace\\{LR\_node}' in these two sections.

\fi
\M1360.  We used to \\{do\_nothing} here, but now we must \\{do\_something}:

\Y\P$\X1360:Incorporate a whatsit node into an hbox\X\S$\6
\&{if} $\\{subtype}(\|p)=\\{LR\_node}$ \1\&{then}\5
\X1386:Adjust the LR stack for the \\{hpack} routine\X\2\par
\U section~651.\fi

\M1366. \P$\X1366:Output the whatsit node \|p in an hlist\X\S$\6
\&{if} $\\{subtype}(\|p)\I\\{LR\_node}$ \1\&{then}\5
$\\{out\_what}(\|p)$\6
\4\&{else} \X1388:Output a reflection instruction if the direction has
changed\X\2\par
\U section~622.\fi

\M1376. Most of the changes have been saved up for the end, so that the
section numbers of \TeX\ in [2] can be left unchanged. Now we come to the
real guts of this extension to mixed-direction texts.

First, we allow the new primitives in horizontal mode, but not in math mode:

\Y\P$\4\X1056:Cases of \\{main\_control} that build boxes and lists\X%
\mathrel{+}\S$\6
\4$\\{hmode}+\\{LR}$: \37\&{begin} \37$\\{new\_whatsit}(\\{LR\_node},\39%
\\{small\_node\_size})$;\5
$\\{LR\_type}(\\{tail})\K\\{cur\_chr}$;\5
\&{end};\6
\4$\\{mmode}+\\{LR}$: \37\\{report\_illegal\_case};\par
\fi
\M1377. A number of routines are based on a stack of one-word nodes whose %
\\{info}
fields contain either \\{begin\_L\_code} or \\{begin\_R\_code}. The top of the
stack is pointed to by \\{LR\_ptr}, and an auxiliary variable \\{LR\_tmp} is
available for stack manipulation.

\Y\P$\X13:Global variables\X\mathrel{+}\S$\6
\4$\\{LR\_ptr},\39\\{LR\_tmp}$: \37\\{pointer};\C{stack of LR codes and temp
for manipulation}\par
\fi
\M1378. \P$\X1378:Declare functions needed for special kinds of nodes\X\S$\6
\4\&{function}\1\  \37$\\{new\_LR}(\|s:\\{small\_number})$: \37\\{pointer};\6
\4\&{var} \37\|p: \37\\{pointer};\C{the new node}\2\6
\&{begin} \37$\|p\K\\{get\_node}(\\{small\_node\_size})$;\5
$\\{type}(\|p)\K\\{whatsit\_node}$;\5
$\\{subtype}(\|p)\K\\{LR\_node}$;\5
$\\{LR\_type}(\|p)\K\|s$;\5
$\\{new\_LR}\K\|p$;\6
\&{end};\par
\A section~1379.
\U section~161.\fi
\M1379. \P$\X1378:Declare functions needed for special kinds of nodes\X%
\mathrel{+}\S$\6
\4\&{function}\1\  \37$\\{safe\_info}(\|p:\\{pointer})$: \37\\{integer};\2\6
\&{begin} \37\&{if} $\|p=\\{null}$ \1\&{then}\5
$\\{safe\_info}\K-1$\ \&{else} $\\{safe\_info}\K\\{info}(\|p)$;\2\6
\&{end};\par
\fi
\eject\M1380. \P$\X1380:Append a \\{begin\_L} to the tail of the current list\X%
\S$\6
$\\{tail\_append}(\\{new\_LR}(\\{begin\_L\_code}))$\par
\U section~1196.\fi
\M1381. \P$\X1381:Append an \\{end\_L} to the tail of the current list\X\S$%
\6
$\\{tail\_append}(\\{new\_LR}(\\{end\_L\_code}))$\par
\U section~1196.\fi
\M1382. When the stack-manipulation macros of this section are used below,
variables \\{LR\_ptr} and \\{LR\_tmp} might be the global variables
declared above, or they might be local to \\{hpack} or \\{post\_line\_break}.

\Y\P\D \37$\\{push\_LR}(\#)\S$\1\6
\&{begin} \37$\\{LR\_tmp}\K\\{get\_avail}$;\5
$\\{info}(\\{LR\_tmp})\K\\{LR\_type}(\#)$;\5
$\\{link}(\\{LR\_tmp})\K\\{LR\_ptr}$;\5
$\\{LR\_ptr}\K\\{LR\_tmp}$;\6
\&{end}\2\par
\goodbreak
\P\D \37$\\{pop\_LR}\S$\1\6
\&{begin} \37$\\{LR\_tmp}\K\\{LR\_ptr}$;\5
$\\{LR\_ptr}\K\\{link}(\\{LR\_tmp})$;\5
$\\{free\_avail}(\\{LR\_tmp})$;\6
\&{end}\2\par
\Y\P$\4\X1382:Flush the LR stack\X\S$\6
\&{while} $\\{LR\_ptr}\I\\{null}$ \1\&{do}\5
\\{pop\_LR}\2\par
\U sections~639 and~877.\fi
\M1383. \P$\X1383:Insert LR nodes at the beginning of the current line\X\S$%
\6
\&{while} $\\{LR\_ptr}\I\\{null}$ \1\&{do}\6
\&{begin} \37$\\{LR\_tmp}\K\\{new\_LR}(\\{info}(\\{LR\_ptr}))$;\5
$\\{link}(\\{LR\_tmp})\K\\{link}(\\{temp\_head})$;\5
$\\{link}(\\{temp\_head})\K\\{LR\_tmp}$;\5
\\{pop\_LR};\6
\&{end}\2\par
\U section~880.\fi
\M1384. \P$\X1384:Adjust the LR stack based on LR nodes in this line\X\S$\6
$\|q\K\\{link}(\\{temp\_head})$;\6
\&{while} $\|q\I\\{cur\_break}(\\{cur\_p})$ \1\&{do}\6
\&{begin} \37\&{if} $\R\\{is\_char\_node}(\|q)$ \1\&{then}\6
\&{if} $\\{type}(\|q)=\\{whatsit\_node}$ \1\&{then}\6
\&{if} $\\{subtype}(\|q)=\\{LR\_node}$ \1\&{then}\6
\&{if} $\\{begin\_LR}(\|q)$ \1\&{then}\5
$\\{push\_LR}(\|q)$\6
\4\&{else} \&{if} $\\{LR\_ptr}\I\\{null}$ \1\&{then}\6
\&{if} $\\{info}(\\{LR\_ptr})=\\{begin\_LR\_type}(\|q)$ \1\&{then}\5
\\{pop\_LR};\2\2\2\2\2\2\6
$\|q\K\\{link}(\|q)$;\6
\&{end}\2\par
\U section~880.\fi
\M1385. We use the fact that \|q now points to the node with \.{\\rightskip}
glue.

\Y\P$\4\X1385:Insert LR nodes at the end of the current line\X\S$\6
\&{if} $\\{LR\_ptr}\I\\{null}$ \1\&{then}\6
\&{begin} \37$\|s\K\\{temp\_head}$;\5
$\|r\K\\{link}(\|s)$;\6
\&{while} $\|r\I\|q$ \1\&{do}\6
\&{begin} \37$\|s\K\|r$;\5
$\|r\K\\{link}(\|s)$;\6
\&{end};\2\6
$\|r\K\\{LR\_ptr}$;\6
\&{while} $\|r\I\\{null}$ \1\&{do}\6
\&{begin} \37$\\{LR\_tmp}\K\\{new\_LR}(\\{info}(\|r)+\\{end\_L\_code})$;\5
$\\{link}(\|s)\K\\{LR\_tmp}$;\5
$\|s\K\\{LR\_tmp}$;\5
$\|r\K\\{link}(\|r)$;\6
\&{end};\2\6
$\\{link}(\|s)\K\|q$;\6
\&{end}\2\par
\U section~880.\fi
\M1386. \P$\X1386:Adjust the LR stack for the \\{hpack} routine\X\S$\6
\&{if} $\\{begin\_LR}(\|p)$ \1\&{then}\5
$\\{push\_LR}(\|p)$\6
\4\&{else} \&{if} $\\{safe\_info}(\\{LR\_ptr})=\\{begin\_LR\_type}(\|p)$ \1%
\&{then}\5
\\{pop\_LR}\6
\4\&{else} \&{begin} \37$\\{incr}(\\{LR\_problems})$;\6
\&{while} $\\{link}(\|q)\I\|p$ \1\&{do}\5
$\|q\K\\{link}(\|q)$;\2\6
$\\{link}(\|q)\K\\{link}(\|p)$;\5
$\\{free\_node}(\|p,\39\\{small\_node\_size})$;\5
$\|p\K\|q$;\6
\&{end}\2\2\par
\U section~1360.\fi
\M1387. \P$\X1387:Check for LR anomalies at the end of \\{hpack}\X\S$\6
\&{if} $\\{LR\_ptr}\I\\{null}$ \1\&{then}\6
\&{begin} \37\&{while} $\\{link}(\|q)\I\\{null}$ \1\&{do}\5
$\|q\K\\{link}(\|q)$;\2\6
\1\&{repeat} \37$\\{link}(\|q)\K\\{new\_LR}(\\{info}(\\{LR\_ptr})+\\{end\_L%
\_code})$;\5
$\|q\K\\{link}(\|q)$;\5
$\\{LR\_problems}\K\\{LR\_problems}+10000$;\5
\\{pop\_LR};\6
\4\&{until}\5
$\\{LR\_ptr}=\\{null}$;\2\6
\&{end};\2\6
\&{if} $\\{LR\_problems}>0$ \1\&{then}\6
\&{begin} \37\\{print\_ln};\5
$\\{print\_nl}(\.{"\\endL\ or\ \\endR\ problem\ ("})$;\6
$\\{print\_int}(\\{LR\_problems}\mathbin{\&{div}}10000)$;\5
$\\{print}(\.{"\ missing,\ "})$;\6
$\\{print\_int}(\\{LR\_problems}\mathbin{\&{mod}}10000)$;\5
$\\{print}(\.{"\ extra"})$;\6
$\\{LR\_problems}\K0$;\5
\&{goto} \37\\{common\_ending};\6
\&{end}\2\par
\U section~649.\fi
\M1388. \P$\X1388:Output a reflection instruction if the direction has
changed\X\S$\6
\&{if} $\\{begin\_LR}(\|p)$ \1\&{then}\6
\&{begin} \37\&{if} $\\{safe\_info}(\\{LR\_ptr})\I\\{LR\_type}(\|p)$ \1\&{then}%
\6
\&{begin} \37\\{synch\_h};\5
\\{synch\_v};\5
$\\{dvi\_out}(\\{begin\_reflect})$;\6
\&{end};\2\6
$\\{push\_LR}(\|p)$;\6
\&{end}\6
\4\&{else} \&{if} $\\{safe\_info}(\\{LR\_ptr})=\\{begin\_LR\_type}(\|p)$ \1%
\&{then}\6
\&{begin} \37\\{pop\_LR};\6
\&{if} $\\{info}(\\{LR\_ptr})+\\{end\_L\_code}\I\\{LR\_type}(\|p)$ \1\&{then}\6
\&{begin} \37\\{synch\_h};\5
\\{synch\_v};\5
$\\{dvi\_out}(\\{end\_reflect})$;\6
\&{end};\2\6
\&{end}\6
\4\&{else} $\\{confusion}(\.{"LR"})$\2\2\par
\U section~1366.\fi
% end of the WEAVE macros
\endgroup
\bigbreak\leftline{\bf Final Important Note}\par\nobreak\medskip\noindent
The extensions to \TeX\ just described are ``upward compatible'' with
standard \TeX, in the sense that ordinary \TeX\ programs will
still run correctly (although more slowly) on \TeXXeT.
However, \TeXXeT\ must {\it not\/} be called a new version of `\TeX', even
though it runs all \TeX\ programs; the reason is, of course,
that \TeX\ will not run all \TeXXeT\ programs.

A name change is necessary to distinguish all programs that do not agree
precisely with the real \TeX. Anybody who runs a program called `\TeX'
should be able to assume that it will give identical results from
all its implementations.
\bigbreak\leftline{\bf Bibliography}\par\nobreak
\def\ref[#1]{\medskip\textindent{[#1]}\hangindent=\parindent\ignorespaces}

\ref[1] Donald E. Knuth, {\sl The \TeX book}, Volume~A of
{\sl Computers \& Typesetting\/} (Reading, Mass.: Addison Wesley, 1986).

\ref[2] Donald E. Knuth, {\sl \TeX: The Program}, Volume~B of
{\sl Computers \& Typesetting\/} (Reading, Mass.: Addison Wesley, 1986).

\ref[3] Pierre MacKay, ``Typesetting problem scripts,''
{\sl Byte\/ \bf 11},\thinspace 2 (February 1986),  201--218.
\bigbreak\leftline{\bf Examples of Typesetting Practice}\par\medskip

\def\\#1. {\noindent\textindent{\bf#1. }}

\\1. From {\sl Textus\/ \bf5} (1966), p.~12; Magnes
Press, Hebrew University of Jerusalem. (Notice the Hebrew quotation
marks surrounding the Hebrew title in footnote~6.)

\vskip1.9in

\\2. Fragments from the third edition of William Wright's classic nineteenth
century grammar of Arabic, volume~2, pages 295--297. (Notice the
page break
in the midst of right-to-left text, and some left-to-right brackets.)
\vfill\eject

\\3. From page 233 of the same book. Here right-reading texts are equated
with = signs; the left sides of each equation are to be read first.

\vskip 1.7in

\\4. From {\sl Bulletin of the Iranian Mathematical Society\/ \bf8} (Tehran,
1978), p.~78L. (Left-to-right mathematics in right-to-left text.)

\vskip 1.5in

\\5. From {\sl Introduction to Mathematics\/} [|Lematematika| |Mavo|] by
Abraham~A. Fraenkel, vol.~1 (Jerusalem, 1942), p.~38. (Page numbers are
`96--90' because `90' and `96' are Hebrew numbers.)

\vskip 1.5in

\\6. Page 200 of the same book illustrates the difference between ellipses
`$\,\cdots\,$' in formulas and ellipses in the text. None of this book's
math-in-text is broken between lines.

\vskip 1.2in
\centerline{$\vdots$}
\bye